Close

@InProceedings{PaulaSalvSilvJr:2023:SeFeEx,
               author = "de Paula, Davi Duarte and Salvadeo, Denis Henrique Pinheiro and 
                         Silva, Lucas Brito and Junior, Uemerson Pinheiro",
          affiliation = "Institute of Geosciences and Exact Sciences, S{\~a}o Paulo State 
                         University and Institute of Geosciences and Exact Sciences, 
                         S{\~a}o Paulo State University and Institute of Geosciences and 
                         Exact Sciences, S{\~a}o Paulo State University and Institute of 
                         Geosciences and Exact Sciences, S{\~a}o Paulo State University",
                title = "Self-Supervised feature extraction for video surveillance anomaly 
                         detection",
            booktitle = "Proceedings...",
                 year = "2023",
               editor = "Clua, Esteban Walter Gonzalez and K{\"o}rting, Thales Sehn and 
                         Paulovich, Fernando Vieira and Feris, Rogerio",
         organization = "Conference on Graphics, Patterns and Images, 36. (SIBGRAPI)",
             keywords = "video surveillance, anomaly detection, feature extraction, deep 
                         learning, self-supervised learning.",
             abstract = "The recent studies on Video Surveillance Anomaly Detection focus 
                         only on the training methodology, utilizing pre-extracted feature 
                         vectors from videos. They give little attention to methodologies 
                         for feature extraction, which could enhance the final anomaly 
                         detection quality. Thus, this work presents a self-supervised 
                         methodology named Self-Supervised Object-Centric (SSOC) for 
                         extracting features from the relationship between objects in 
                         videos. To achieve this, a pretext task is employed to predict the 
                         future position and appearance of a reference object based on a 
                         set of past frames. The Deep Learning-based model used in the 
                         pretext task is then fine-tuned on Weak Supervised datasets for 
                         the downstream task, using the Multiple Instance Learning training 
                         strategy, with the goal of detecting anomalies in the videos. In 
                         the best case scenario, the results demonstrate an increase of 
                         3.1\% in AUC on the UCF Crime dataset and an increase of 2.8\% 
                         in AUC on the CamNuvem dataset.",
  conference-location = "Rio Grande, RS",
      conference-year = "Nov. 06-09, 2023",
                  doi = "10.1109/SIBGRAPI59091.2023.10347173",
                  url = "http://dx.doi.org/10.1109/SIBGRAPI59091.2023.10347173",
             language = "en",
                  ibi = "8JMKD3MGPEW34M/49L86LH",
                  url = "http://urlib.net/ibi/8JMKD3MGPEW34M/49L86LH",
           targetfile = "depaula-27-without-copyright.pdf",
        urlaccessdate = "2024, May 05"
}


Close